{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data from http://grouplens.org/datasets/movielens/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "data_folder = os.path.join(os.path.expanduser(\"~\"), \"Data\", \"ml-100k\")\n",
    "ratings_filename = os.path.join(data_folder, \"u.data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td> 196</td>\n",
       "      <td> 242</td>\n",
       "      <td> 3</td>\n",
       "      <td>1997-12-04 15:55:49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td> 186</td>\n",
       "      <td> 302</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-04-04 19:22:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>  22</td>\n",
       "      <td> 377</td>\n",
       "      <td> 1</td>\n",
       "      <td>1997-11-07 07:18:36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td> 244</td>\n",
       "      <td>  51</td>\n",
       "      <td> 2</td>\n",
       "      <td>1997-11-27 05:02:03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td> 166</td>\n",
       "      <td> 346</td>\n",
       "      <td> 1</td>\n",
       "      <td>1998-02-02 05:33:16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   UserID  MovieID  Rating            Datetime\n",
       "0     196      242       3 1997-12-04 15:55:49\n",
       "1     186      302       3 1998-04-04 19:22:22\n",
       "2      22      377       1 1997-11-07 07:18:36\n",
       "3     244       51       2 1997-11-27 05:02:03\n",
       "4     166      346       1 1998-02-02 05:33:16"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_ratings = pd.read_csv(ratings_filename, delimiter=\"\\t\", header=None, names = [\"UserID\", \"MovieID\", \"Rating\", \"Datetime\"])\n",
    "all_ratings[\"Datetime\"] = pd.to_datetime(all_ratings['Datetime'],unit='s')\n",
    "all_ratings[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>81098</th>\n",
       "      <td> 675</td>\n",
       "      <td>   86</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:26:14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90696</th>\n",
       "      <td> 675</td>\n",
       "      <td>  223</td>\n",
       "      <td> 1</td>\n",
       "      <td>1998-03-10 00:35:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92650</th>\n",
       "      <td> 675</td>\n",
       "      <td>  235</td>\n",
       "      <td> 1</td>\n",
       "      <td>1998-03-10 00:35:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95459</th>\n",
       "      <td> 675</td>\n",
       "      <td>  242</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:08:42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82845</th>\n",
       "      <td> 675</td>\n",
       "      <td>  244</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-10 00:29:35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53293</th>\n",
       "      <td> 675</td>\n",
       "      <td>  258</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-10 00:11:19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97286</th>\n",
       "      <td> 675</td>\n",
       "      <td>  269</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:08:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93720</th>\n",
       "      <td> 675</td>\n",
       "      <td>  272</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-10 00:07:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73389</th>\n",
       "      <td> 675</td>\n",
       "      <td>  286</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:07:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77524</th>\n",
       "      <td> 675</td>\n",
       "      <td>  303</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:08:42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47367</th>\n",
       "      <td> 675</td>\n",
       "      <td>  305</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:09:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44300</th>\n",
       "      <td> 675</td>\n",
       "      <td>  306</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:08:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53730</th>\n",
       "      <td> 675</td>\n",
       "      <td>  311</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-10 00:10:47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54284</th>\n",
       "      <td> 675</td>\n",
       "      <td>  312</td>\n",
       "      <td> 2</td>\n",
       "      <td>1998-03-10 00:10:24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63291</th>\n",
       "      <td> 675</td>\n",
       "      <td>  318</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:21:13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87082</th>\n",
       "      <td> 675</td>\n",
       "      <td>  321</td>\n",
       "      <td> 2</td>\n",
       "      <td>1998-03-10 00:11:48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56108</th>\n",
       "      <td> 675</td>\n",
       "      <td>  344</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:12:34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53046</th>\n",
       "      <td> 675</td>\n",
       "      <td>  347</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:07:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94617</th>\n",
       "      <td> 675</td>\n",
       "      <td>  427</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:28:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69915</th>\n",
       "      <td> 675</td>\n",
       "      <td>  463</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:16:43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46744</th>\n",
       "      <td> 675</td>\n",
       "      <td>  509</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:24:25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46598</th>\n",
       "      <td> 675</td>\n",
       "      <td>  531</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:18:28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52962</th>\n",
       "      <td> 675</td>\n",
       "      <td>  650</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:32:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>94029</th>\n",
       "      <td> 675</td>\n",
       "      <td>  750</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:08:07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53223</th>\n",
       "      <td> 675</td>\n",
       "      <td>  874</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:11:19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62277</th>\n",
       "      <td> 675</td>\n",
       "      <td>  891</td>\n",
       "      <td> 2</td>\n",
       "      <td>1998-03-10 00:12:59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77274</th>\n",
       "      <td> 675</td>\n",
       "      <td>  896</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:09:35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66194</th>\n",
       "      <td> 675</td>\n",
       "      <td>  900</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:10:24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54994</th>\n",
       "      <td> 675</td>\n",
       "      <td>  937</td>\n",
       "      <td> 1</td>\n",
       "      <td>1998-03-10 00:35:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61742</th>\n",
       "      <td> 675</td>\n",
       "      <td> 1007</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:25:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49225</th>\n",
       "      <td> 675</td>\n",
       "      <td> 1101</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-03-10 00:33:49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50692</th>\n",
       "      <td> 675</td>\n",
       "      <td> 1255</td>\n",
       "      <td> 1</td>\n",
       "      <td>1998-03-10 00:35:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>74202</th>\n",
       "      <td> 675</td>\n",
       "      <td> 1628</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:30:37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47866</th>\n",
       "      <td> 675</td>\n",
       "      <td> 1653</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-03-10 00:31:53</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       UserID  MovieID  Rating            Datetime\n",
       "81098     675       86       4 1998-03-10 00:26:14\n",
       "90696     675      223       1 1998-03-10 00:35:51\n",
       "92650     675      235       1 1998-03-10 00:35:51\n",
       "95459     675      242       4 1998-03-10 00:08:42\n",
       "82845     675      244       3 1998-03-10 00:29:35\n",
       "53293     675      258       3 1998-03-10 00:11:19\n",
       "97286     675      269       5 1998-03-10 00:08:07\n",
       "93720     675      272       3 1998-03-10 00:07:11\n",
       "73389     675      286       4 1998-03-10 00:07:11\n",
       "77524     675      303       5 1998-03-10 00:08:42\n",
       "47367     675      305       4 1998-03-10 00:09:08\n",
       "44300     675      306       5 1998-03-10 00:08:07\n",
       "53730     675      311       3 1998-03-10 00:10:47\n",
       "54284     675      312       2 1998-03-10 00:10:24\n",
       "63291     675      318       5 1998-03-10 00:21:13\n",
       "87082     675      321       2 1998-03-10 00:11:48\n",
       "56108     675      344       4 1998-03-10 00:12:34\n",
       "53046     675      347       4 1998-03-10 00:07:11\n",
       "94617     675      427       5 1998-03-10 00:28:11\n",
       "69915     675      463       5 1998-03-10 00:16:43\n",
       "46744     675      509       5 1998-03-10 00:24:25\n",
       "46598     675      531       5 1998-03-10 00:18:28\n",
       "52962     675      650       5 1998-03-10 00:32:51\n",
       "94029     675      750       4 1998-03-10 00:08:07\n",
       "53223     675      874       4 1998-03-10 00:11:19\n",
       "62277     675      891       2 1998-03-10 00:12:59\n",
       "77274     675      896       5 1998-03-10 00:09:35\n",
       "66194     675      900       4 1998-03-10 00:10:24\n",
       "54994     675      937       1 1998-03-10 00:35:51\n",
       "61742     675     1007       4 1998-03-10 00:25:22\n",
       "49225     675     1101       4 1998-03-10 00:33:49\n",
       "50692     675     1255       1 1998-03-10 00:35:51\n",
       "74202     675     1628       5 1998-03-10 00:30:37\n",
       "47866     675     1653       5 1998-03-10 00:31:53"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# As you can see, there are no review for most movies, such as #213\n",
    "all_ratings[all_ratings[\"UserID\"] == 675].sort(\"MovieID\")  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "      <th>Favorable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>  62</td>\n",
       "      <td>  257</td>\n",
       "      <td> 2</td>\n",
       "      <td>1997-11-12 22:07:14</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td> 286</td>\n",
       "      <td> 1014</td>\n",
       "      <td> 5</td>\n",
       "      <td>1997-11-17 15:38:45</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td> 200</td>\n",
       "      <td>  222</td>\n",
       "      <td> 5</td>\n",
       "      <td>1997-10-05 09:05:40</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td> 210</td>\n",
       "      <td>   40</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-27 21:59:54</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td> 224</td>\n",
       "      <td>   29</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-02-21 23:40:57</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    UserID  MovieID  Rating            Datetime Favorable\n",
       "10      62      257       2 1997-11-12 22:07:14     False\n",
       "11     286     1014       5 1997-11-17 15:38:45      True\n",
       "12     200      222       5 1997-10-05 09:05:40      True\n",
       "13     210       40       3 1998-03-27 21:59:54     False\n",
       "14     224       29       3 1998-02-21 23:40:57     False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Not all reviews are favourable! Our goal is \"other recommended books\", so we only want favourable reviews\n",
    "all_ratings[\"Favorable\"] = all_ratings[\"Rating\"] > 3\n",
    "all_ratings[10:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "      <th>Favorable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td> 1</td>\n",
       "      <td>  61</td>\n",
       "      <td> 4</td>\n",
       "      <td>1997-11-03 07:33:40</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>305</th>\n",
       "      <td> 1</td>\n",
       "      <td> 189</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-03-01 06:15:28</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>333</th>\n",
       "      <td> 1</td>\n",
       "      <td>  33</td>\n",
       "      <td> 4</td>\n",
       "      <td>1997-11-03 07:38:19</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td> 1</td>\n",
       "      <td> 160</td>\n",
       "      <td> 4</td>\n",
       "      <td>1997-09-24 03:42:27</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>478</th>\n",
       "      <td> 1</td>\n",
       "      <td>  20</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-02-14 04:51:23</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     UserID  MovieID  Rating            Datetime Favorable\n",
       "202       1       61       4 1997-11-03 07:33:40      True\n",
       "305       1      189       3 1998-03-01 06:15:28     False\n",
       "333       1       33       4 1997-11-03 07:38:19      True\n",
       "334       1      160       4 1997-09-24 03:42:27      True\n",
       "478       1       20       4 1998-02-14 04:51:23      True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_ratings[all_ratings[\"UserID\"] == 1][:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample the dataset. You can try increasing the size of the sample, but the run time will be considerably longer\n",
    "ratings = all_ratings[all_ratings['UserID'].isin(range(200))]  # & ratings[\"UserID\"].isin(range(100))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "      <th>Favorable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td> 122</td>\n",
       "      <td> 387</td>\n",
       "      <td> 5</td>\n",
       "      <td>1997-11-11 17:47:39</td>\n",
       "      <td> True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td> 119</td>\n",
       "      <td> 392</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-01-30 16:13:34</td>\n",
       "      <td> True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td> 167</td>\n",
       "      <td> 486</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-04-16 14:54:12</td>\n",
       "      <td> True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>  38</td>\n",
       "      <td>  95</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-04-13 01:14:54</td>\n",
       "      <td> True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>  63</td>\n",
       "      <td> 277</td>\n",
       "      <td> 4</td>\n",
       "      <td>1997-10-01 23:10:01</td>\n",
       "      <td> True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    UserID  MovieID  Rating            Datetime Favorable\n",
       "16     122      387       5 1997-11-11 17:47:39      True\n",
       "20     119      392       4 1998-01-30 16:13:34      True\n",
       "21     167      486       4 1998-04-16 14:54:12      True\n",
       "26      38       95       5 1998-04-13 01:14:54      True\n",
       "28      63      277       4 1997-10-01 23:10:01      True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We start by creating a dataset of each user's favourable reviews\n",
    "favorable_ratings = ratings[ratings[\"Favorable\"]]\n",
    "favorable_ratings[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "199"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We are only interested in the reviewers who have more than one review\n",
    "favorable_reviews_by_users = dict((k, frozenset(v.values)) for k, v in favorable_ratings.groupby(\"UserID\")[\"MovieID\"])\n",
    "len(favorable_reviews_by_users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Favorable</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MovieID</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>50 </th>\n",
       "      <td> 100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>  89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>  83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td>  79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>  74</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Favorable\n",
       "MovieID           \n",
       "50             100\n",
       "100             89\n",
       "258             83\n",
       "181             79\n",
       "174             74"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Find out how many movies have favourable ratings\n",
    "num_favorable_by_movie = ratings[[\"MovieID\", \"Favorable\"]].groupby(\"MovieID\").sum()\n",
    "num_favorable_by_movie.sort(\"Favorable\", ascending=False)[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "\n",
    "def find_frequent_itemsets(favorable_reviews_by_users, k_1_itemsets, min_support):\n",
    "    counts = defaultdict(int)\n",
    "    for user, reviews in favorable_reviews_by_users.items():\n",
    "        for itemset in k_1_itemsets:\n",
    "            if itemset.issubset(reviews):\n",
    "                for other_reviewed_movie in reviews - itemset:\n",
    "                    current_superset = itemset | frozenset((other_reviewed_movie,))\n",
    "                    counts[current_superset] += 1\n",
    "    return dict([(itemset, frequency) for itemset, frequency in counts.items() if frequency >= min_support])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 16 movies with more than 50 favorable reviews\n",
      "I found 93 frequent itemsets of length 2\n",
      "I found 295 frequent itemsets of length 3\n",
      "I found 593 frequent itemsets of length 4\n",
      "I found 785 frequent itemsets of length 5\n",
      "I found 677 frequent itemsets of length 6\n",
      "I found 373 frequent itemsets of length 7\n",
      "I found 126 frequent itemsets of length 8\n",
      "I found 24 frequent itemsets of length 9\n",
      "I found 2 frequent itemsets of length 10\n",
      "Did not find any frequent itemsets of length 11\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "frequent_itemsets = {}  # itemsets are sorted by length\n",
    "min_support = 50\n",
    "\n",
    "# k=1 candidates are the isbns with more than min_support favourable reviews\n",
    "frequent_itemsets[1] = dict((frozenset((movie_id,)), row[\"Favorable\"])\n",
    "                                for movie_id, row in num_favorable_by_movie.iterrows()\n",
    "                                if row[\"Favorable\"] > min_support)\n",
    "\n",
    "print(\"There are {} movies with more than {} favorable reviews\".format(len(frequent_itemsets[1]), min_support))\n",
    "sys.stdout.flush()\n",
    "for k in range(2, 20):\n",
    "    # Generate candidates of length k, using the frequent itemsets of length k-1\n",
    "    # Only store the frequent itemsets\n",
    "    cur_frequent_itemsets = find_frequent_itemsets(favorable_reviews_by_users, frequent_itemsets[k-1],\n",
    "                                                   min_support)\n",
    "    if len(cur_frequent_itemsets) == 0:\n",
    "        print(\"Did not find any frequent itemsets of length {}\".format(k))\n",
    "        sys.stdout.flush()\n",
    "        break\n",
    "    else:\n",
    "        print(\"I found {} frequent itemsets of length {}\".format(len(cur_frequent_itemsets), k))\n",
    "        #print(cur_frequent_itemsets)\n",
    "        sys.stdout.flush()\n",
    "        frequent_itemsets[k] = cur_frequent_itemsets\n",
    "# We aren't interested in the itemsets of length 1, so remove those\n",
    "del frequent_itemsets[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found a total of 2968 frequent itemsets\n"
     ]
    }
   ],
   "source": [
    "print(\"Found a total of {0} frequent itemsets\".format(sum(len(itemsets) for itemsets in frequent_itemsets.values())))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 15285 candidate rules\n"
     ]
    }
   ],
   "source": [
    "# Now we create the association rules. First, they are candidates until the confidence has been tested\n",
    "candidate_rules = []\n",
    "for itemset_length, itemset_counts in frequent_itemsets.items():\n",
    "    for itemset in itemset_counts.keys():\n",
    "        for conclusion in itemset:\n",
    "            premise = itemset - set((conclusion,))\n",
    "            candidate_rules.append((premise, conclusion))\n",
    "print(\"There are {} candidate rules\".format(len(candidate_rules)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[(frozenset({79}), 258), (frozenset({258}), 79), (frozenset({50}), 64), (frozenset({64}), 50), (frozenset({127}), 181)]\n"
     ]
    }
   ],
   "source": [
    "print(candidate_rules[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now, we compute the confidence of each of these rules. This is very similar to what we did in chapter 1\n",
    "correct_counts = defaultdict(int)\n",
    "incorrect_counts = defaultdict(int)\n",
    "for user, reviews in favorable_reviews_by_users.items():\n",
    "    for candidate_rule in candidate_rules:\n",
    "        premise, conclusion = candidate_rule\n",
    "        if premise.issubset(reviews):\n",
    "            if conclusion in reviews:\n",
    "                correct_counts[candidate_rule] += 1\n",
    "            else:\n",
    "                incorrect_counts[candidate_rule] += 1\n",
    "rule_confidence = {candidate_rule: correct_counts[candidate_rule] / float(correct_counts[candidate_rule] + incorrect_counts[candidate_rule])\n",
    "              for candidate_rule in candidate_rules}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Choose only rules above a minimum confidence level\n",
    "min_confidence = 0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5152\n"
     ]
    }
   ],
   "source": [
    "# Filter out the rules with poor confidence\n",
    "rule_confidence = {rule: confidence for rule, confidence in rule_confidence.items() if confidence > min_confidence}\n",
    "print(len(rule_confidence))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "from operator import itemgetter\n",
    "sorted_confidence = sorted(rule_confidence.items(), key=itemgetter(1), reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rule #1\n",
      "Rule: If a person recommends frozenset({64, 56, 98, 50, 7}) they will also recommend 174\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #2\n",
      "Rule: If a person recommends frozenset({98, 100, 172, 79, 50, 56}) they will also recommend 7\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #3\n",
      "Rule: If a person recommends frozenset({98, 172, 181, 174, 7}) they will also recommend 50\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #4\n",
      "Rule: If a person recommends frozenset({64, 98, 100, 7, 172, 50}) they will also recommend 174\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #5\n",
      "Rule: If a person recommends frozenset({64, 1, 7, 172, 79, 50}) they will also recommend 181\n",
      " - Confidence: 1.000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for index in range(5):\n",
    "    print(\"Rule #{0}\".format(index + 1))\n",
    "    (premise, conclusion) = sorted_confidence[index][0]\n",
    "    print(\"Rule: If a person recommends {0} they will also recommend {1}\".format(premise, conclusion))\n",
    "    print(\" - Confidence: {0:.3f}\".format(rule_confidence[(premise, conclusion)]))\n",
    "    print(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Even better, we can get the movie titles themselves from the dataset\n",
    "movie_name_filename = os.path.join(data_folder, \"u.item\")\n",
    "movie_name_data = pd.read_csv(movie_name_filename, delimiter=\"|\", header=None, encoding = \"mac-roman\")\n",
    "movie_name_data.columns = [\"MovieID\", \"Title\", \"Release Date\", \"Video Release\", \"IMDB\", \"<UNK>\", \"Action\", \"Adventure\",\n",
    "                           \"Animation\", \"Children's\", \"Comedy\", \"Crime\", \"Documentary\", \"Drama\", \"Fantasy\", \"Film-Noir\",\n",
    "                           \"Horror\", \"Musical\", \"Mystery\", \"Romance\", \"Sci-Fi\", \"Thriller\", \"War\", \"Western\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_movie_name(movie_id):\n",
    "    title_object = movie_name_data[movie_name_data[\"MovieID\"] == movie_id][\"Title\"]\n",
    "    title = title_object.values[0]\n",
    "    return title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Get Shorty (1995)'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_movie_name(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rule #1\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Pulp Fiction (1994), Silence of the Lambs, The (1991), Star Wars (1977), Twelve Monkeys (1995) they will also recommend Raiders of the Lost Ark (1981)\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #2\n",
      "Rule: If a person recommends Silence of the Lambs, The (1991), Fargo (1996), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) they will also recommend Twelve Monkeys (1995)\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #3\n",
      "Rule: If a person recommends Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Return of the Jedi (1983), Raiders of the Lost Ark (1981), Twelve Monkeys (1995) they will also recommend Star Wars (1977)\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #4\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Silence of the Lambs, The (1991), Fargo (1996), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Star Wars (1977) they will also recommend Raiders of the Lost Ark (1981)\n",
      " - Confidence: 1.000\n",
      "\n",
      "Rule #5\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977) they will also recommend Return of the Jedi (1983)\n",
      " - Confidence: 1.000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for index in range(5):\n",
    "    print(\"Rule #{0}\".format(index + 1))\n",
    "    (premise, conclusion) = sorted_confidence[index][0]\n",
    "    premise_names = \", \".join(get_movie_name(idx) for idx in premise)\n",
    "    conclusion_name = get_movie_name(conclusion)\n",
    "    print(\"Rule: If a person recommends {0} they will also recommend {1}\".format(premise_names, conclusion_name))\n",
    "    print(\" - Confidence: {0:.3f}\".format(rule_confidence[(premise, conclusion)]))\n",
    "    print(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluation using test data\n",
    "test_dataset = all_ratings[~all_ratings['UserID'].isin(range(200))]\n",
    "test_favorable = test_dataset[test_dataset[\"Favorable\"]]\n",
    "#test_not_favourable = test_dataset[~test_dataset[\"Favourable\"]]\n",
    "test_favorable_by_users = dict((k, frozenset(v.values)) for k, v in test_favorable.groupby(\"UserID\")[\"MovieID\"])\n",
    "#test_not_favourable_by_users = dict((k, frozenset(v.values)) for k, v in test_not_favourable.groupby(\"UserID\")[\"MovieID\"])\n",
    "#test_users = test_dataset[\"UserID\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UserID</th>\n",
       "      <th>MovieID</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Datetime</th>\n",
       "      <th>Favorable</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3 </th>\n",
       "      <td> 244</td>\n",
       "      <td>   51</td>\n",
       "      <td> 2</td>\n",
       "      <td>1997-11-27 05:02:03</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5 </th>\n",
       "      <td> 298</td>\n",
       "      <td>  474</td>\n",
       "      <td> 4</td>\n",
       "      <td>1998-01-07 14:20:06</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7 </th>\n",
       "      <td> 253</td>\n",
       "      <td>  465</td>\n",
       "      <td> 5</td>\n",
       "      <td>1998-04-03 18:34:27</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8 </th>\n",
       "      <td> 305</td>\n",
       "      <td>  451</td>\n",
       "      <td> 3</td>\n",
       "      <td>1998-02-01 09:20:17</td>\n",
       "      <td> False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td> 286</td>\n",
       "      <td> 1014</td>\n",
       "      <td> 5</td>\n",
       "      <td>1997-11-17 15:38:45</td>\n",
       "      <td>  True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    UserID  MovieID  Rating            Datetime Favorable\n",
       "3      244       51       2 1997-11-27 05:02:03     False\n",
       "5      298      474       4 1998-01-07 14:20:06      True\n",
       "7      253      465       5 1998-04-03 18:34:27      True\n",
       "8      305      451       3 1998-02-01 09:20:17     False\n",
       "11     286     1014       5 1997-11-17 15:38:45      True"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_dataset[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "correct_counts = defaultdict(int)\n",
    "incorrect_counts = defaultdict(int)\n",
    "for user, reviews in test_favorable_by_users.items():\n",
    "    for candidate_rule in candidate_rules:\n",
    "        premise, conclusion = candidate_rule\n",
    "        if premise.issubset(reviews):\n",
    "            if conclusion in reviews:\n",
    "                correct_counts[candidate_rule] += 1\n",
    "            else:\n",
    "                incorrect_counts[candidate_rule] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5152\n"
     ]
    }
   ],
   "source": [
    "test_confidence = {candidate_rule: correct_counts[candidate_rule] / float(correct_counts[candidate_rule] + incorrect_counts[candidate_rule])\n",
    "                   for candidate_rule in rule_confidence}\n",
    "print(len(test_confidence))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[((frozenset({64, 1, 7, 172, 79, 50}), 174), 1.0), ((frozenset({64, 258, 98, 7, 174, 181}), 172), 1.0), ((frozenset({64, 1, 98, 7, 79, 181, 56}), 174), 1.0), ((frozenset({64, 1, 98, 7, 172, 79, 181}), 174), 1.0), ((frozenset({64, 258, 98, 7, 174, 50, 181}), 172), 1.0)]\n"
     ]
    }
   ],
   "source": [
    "sorted_test_confidence = sorted(test_confidence.items(), key=itemgetter(1), reverse=True)\n",
    "print(sorted_test_confidence[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rule #1\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Pulp Fiction (1994), Silence of the Lambs, The (1991), Star Wars (1977), Twelve Monkeys (1995) they will also recommend Raiders of the Lost Ark (1981)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.909\n",
      "\n",
      "Rule #2\n",
      "Rule: If a person recommends Silence of the Lambs, The (1991), Fargo (1996), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977), Pulp Fiction (1994) they will also recommend Twelve Monkeys (1995)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.609\n",
      "\n",
      "Rule #3\n",
      "Rule: If a person recommends Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Return of the Jedi (1983), Raiders of the Lost Ark (1981), Twelve Monkeys (1995) they will also recommend Star Wars (1977)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.946\n",
      "\n",
      "Rule #4\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Silence of the Lambs, The (1991), Fargo (1996), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Star Wars (1977) they will also recommend Raiders of the Lost Ark (1981)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.971\n",
      "\n",
      "Rule #5\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Toy Story (1995), Twelve Monkeys (1995), Empire Strikes Back, The (1980), Fugitive, The (1993), Star Wars (1977) they will also recommend Return of the Jedi (1983)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.900\n",
      "\n",
      "Rule #6\n",
      "Rule: If a person recommends Toy Story (1995), Silence of the Lambs, The (1991), Fargo (1996), Raiders of the Lost Ark (1981), Godfather, The (1972) they will also recommend Pulp Fiction (1994)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.750\n",
      "\n",
      "Rule #7\n",
      "Rule: If a person recommends Godfather, The (1972), Silence of the Lambs, The (1991), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Twelve Monkeys (1995) they will also recommend Shawshank Redemption, The (1994)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.854\n",
      "\n",
      "Rule #8\n",
      "Rule: If a person recommends Pulp Fiction (1994), Toy Story (1995), Shawshank Redemption, The (1994), Godfather, The (1972) they will also recommend Silence of the Lambs, The (1991)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.870\n",
      "\n",
      "Rule #9\n",
      "Rule: If a person recommends Shawshank Redemption, The (1994), Fargo (1996), Return of the Jedi (1983), Raiders of the Lost Ark (1981), Fugitive, The (1993) they will also recommend Pulp Fiction (1994)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.756\n",
      "\n",
      "Rule #10\n",
      "Rule: If a person recommends Silence of the Lambs, The (1991), Fargo (1996), Empire Strikes Back, The (1980), Raiders of the Lost Ark (1981), Fugitive, The (1993), Star Wars (1977), Return of the Jedi (1983) they will also recommend Pulp Fiction (1994)\n",
      " - Train Confidence: 1.000\n",
      " - Test Confidence: 0.756\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for index in range(10):\n",
    "    print(\"Rule #{0}\".format(index + 1))\n",
    "    (premise, conclusion) = sorted_confidence[index][0]\n",
    "    premise_names = \", \".join(get_movie_name(idx) for idx in premise)\n",
    "    conclusion_name = get_movie_name(conclusion)\n",
    "    print(\"Rule: If a person recommends {0} they will also recommend {1}\".format(premise_names, conclusion_name))\n",
    "    print(\" - Train Confidence: {0:.3f}\".format(rule_confidence.get((premise, conclusion), -1)))\n",
    "    print(\" - Test Confidence: {0:.3f}\".format(test_confidence.get((premise, conclusion), -1)))\n",
    "    print(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "12px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
